Skip to main content

Convert PDF to Text

  • Simple client side code to convert PDF to text
  • JS Library:

"react-pdf": "^7.7.1"

Code​

  • Form Code
<input type="file" name="pdfFile" onChange={extractText} accept=".pdf" />
  • Function
const extractText = async (event) => {
const file = event.target.files[0];
try {
const blobUrl = URL.createObjectURL(file);
const loadingTask = pdfjs.getDocument(blobUrl);
const pdf = await loadingTask.promise;
const numPages = pdf.numPages;
let extractedText = "";

for (let pageNumber = 1; pageNumber <= numPages; pageNumber++) {
const page = await pdf.getPage(pageNumber);
const textContent = await page.getTextContent();
const pageText = textContent.items.map((item) => item.str).join(" ");
extractedText += pageText;
}
console.log(extractedText);
if (extractedText.length > 0) {
return extractedText;
}
console.error("Error extracting text from PDF:", error);

URL.revokeObjectURL(blobUrl);
} catch (error) {
console.error("Error extracting text from PDF:", error);
}
};